scale_data = True


import torch
import torch.nn as nn
import torch.nn.functional as F


import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from math import ceil
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline


df = pd.read_excel('Covid May 2020 data.xlsx', sheet_name='Data')
df.tail()


# Scale data
def scale_dataframe(df):
    scaler = MinMaxScaler()
    df['age'] = scaler.fit_transform(df[['age']])
    df['bmi'] = scaler.fit_transform(df[['bmi']])
    df['HbA1c_mmol_mol'] = scaler.fit_transform(df[['HbA1c_mmol_mol']])
    
    return df

if scale_data:
    df = scale_dataframe(df)

df.head()


# Replace targets with ints
result_map = {'mild illness': 0, 'hospitalised': 1, 'died': 2}
result_map_reverse = {value: key for key, value in result_map.items()}

df['result'] = df['result'].replace(result_map)


# Set X, y and convert them to np arrays
X = df.drop('result', axis=1)
num_cols = X.shape[1]

y = df['result']
num_outputs = y.nunique()

X = X.values
y = y.values

h1_layers = ceil((num_cols + num_outputs)/2)


# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=41)

# Convert X features to float tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)

# Convert y labels to long tensors
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)


class Model(nn.Module):
  def __init__(self, in_features=num_cols, h1=h1_layers, out_features=num_outputs):
    super().__init__()
    self.fc1 = nn.Linear(in_features, h1)
    self.out = nn.Linear(h1, out_features)

  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = self.out(x)

    return x

torch.manual_seed(49)
model = Model()


losses = []

epochs = 250
loss_function = nn.CrossEntropyLoss()
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

for i in range(epochs):
  y_pred = model.forward(X_train) # predicted results
  loss = loss_function(y_pred, y_train) # predicted values vs the y_train

  losses.append(loss.detach().numpy())

  if i % 10 == 0:
    print(f'At epoch {i} loss was: {loss}')

  optimiser.zero_grad()
  loss.backward()
  optimiser.step()

At epoch 0 loss was: 1.1762192249298096
At epoch 10 loss was: 1.0961493253707886
At epoch 20 loss was: 1.0698022842407227
At epoch 30 loss was: 1.0202093124389648
At epoch 40 loss was: 0.9572418928146362
At epoch 50 loss was: 0.8754114508628845
At epoch 60 loss was: 0.7935022711753845
At epoch 70 loss was: 0.720582902431488
At epoch 80 loss was: 0.6599651575088501
At epoch 90 loss was: 0.6124850511550903
At epoch 100 loss was: 0.5760200023651123
At epoch 110 loss was: 0.5480552315711975
At epoch 120 loss was: 0.5266116857528687
At epoch 130 loss was: 0.510050356388092
At epoch 140 loss was: 0.49717631936073303
At epoch 150 loss was: 0.48710158467292786
At epoch 160 loss was: 0.47918781638145447
At epoch 170 loss was: 0.47297823429107666
At epoch 180 loss was: 0.4681414067745209
At epoch 190 loss was: 0.46439245343208313
At epoch 200 loss was: 0.46149691939353943
At epoch 210 loss was: 0.45926496386528015
At epoch 220 loss was: 0.45754602551460266
At epoch 230 loss was: 0.4562242329120636
At epoch 240 loss was: 0.4552067220211029


plt.plot(range(epochs), losses)
plt.ylabel("Loss")
plt.xlabel('Epoch')

Text(0.5, 0, 'Epoch')


# Determine feature importance
def permutation_feature_importance(model, X_test, y_test, column_names, n_iterations=10):
    baseline_accuracy = evaluate_accuracy(model, X_test, y_test)
    print(f'Accuracy (%): {baseline_accuracy*100:.2f}')
    feature_importances = np.zeros(X_test.shape[1])
    
    for i in range(X_test.shape[1]):
        accuracy_scores = []
        for _ in range(n_iterations):
            X_test_permuted = X_test.detach().clone()
            X_test_permuted[:, i] = X_test_permuted[:, i][torch.randperm(X_test.shape[0])]
            accuracy = evaluate_accuracy(model, X_test_permuted, y_test)
            accuracy_scores.append(accuracy)
        feature_importances[i] = baseline_accuracy - np.mean(accuracy_scores)
    
    return feature_importances

# Evaluate model (on test dataset)
def evaluate_accuracy(model, X_test, y_test):
    with torch.no_grad(): #  deactivate gradient computations (required in training but not testing)
        y_pred = model(X_test)        
        predicted = y_pred.argmax(dim=1) # convert predicted probabilities to class labels
        accuracy = (predicted == y_test).sum().item() / len(y_test)

    return accuracy

feature_importances = permutation_feature_importance(model, X_test, y_test, df.columns.tolist())

importance_dict = {}
for column, importance in zip(df.columns.tolist(), feature_importances):
    print(f"'{column}' importance score: {importance}")
    importance_dict[column] = importance

Accuracy (%): 79.47
'age' importance score: 0.4310526315789474
'bmi' importance score: 0.13339712918660296
'HbA1c_mmol_mol' importance score: 0.002105263157894721
'has_asthma' importance score: 0.0011004784688994906
'has_high_blood_pressure' importance score: 0.0014354066985645675


# Create the bar chart
fig, ax = plt.subplots()
ax.bar(list(importance_dict.keys()), list(importance_dict.values()))
ax.set_ylabel("Importance Score")
plt.xticks(rotation=90)
plt.show()


df_new = pd.read_excel('Data to predict on.xlsx', sheet_name='Data')
df_new.tail()


if scale_data:
    df_new_for_model = scale_dataframe(df_new.copy())
else:
    df_new_for_model = df_new


predictions = []

for index, row in df_new_for_model.iterrows():

    new_person = torch.tensor([row['age'],
                           row['bmi'],
                           row['HbA1c_mmol_mol'],
                           row['has_asthma'],
                           row['has_high_blood_pressure']],
                           dtype=torch.float32)                         

    with torch.no_grad():
        pred1 = model(new_person)
    max_index = pred1.argmax().item()
    result = result_map_reverse[max_index]
    predictions.append(result)

df_new['prediction'] = predictions

df_new.head()


torch.save(model.state_dict(), 'Covid Multi-Class.pt')

	age	bmi	HbA1c_mmol_mol	has_asthma	has_high_blood_pressure	result
0	0.243902	0.263158	0.472222	1	0	mild illness
1	0.829268	0.257895	0.347222	0	0	hospitalised
2	0.451220	0.600000	0.736111	0	1	hospitalised
3	0.719512	0.110526	0.444444	0	1	died
4	0.060976	0.963158	0.625000	0	0	mild illness

This notebook contains a simple PyTorch neural network that predicts covid outcome (mild illness, hospitalised, died). This analysis is performed on fabricated data I created.¶

Import modules and data¶

Prepare data¶

Create model¶

Train model¶

Graph epochs and losses¶

Evaluate model and find feature importance¶

Make prediction on new data¶

Save model locally¶

	age	bmi	HbA1c_mmol_mol	has_asthma	has_high_blood_pressure	result
10444	94	33.8	67	0	1	died
10445	90	23.3	100	0	1	hospitalised
10446	83	28.6	33	1	0	died
10447	31	18.2	80	0	0	mild illness
10448	26	26.9	40	1	0	mild illness

	age	bmi	HbA1c_mmol_mol	has_asthma	has_high_blood_pressure
1125	23	34.5	36	0	1
1126	46	28.8	94	0	0
1127	72	28.7	67	1	1
1128	91	19.8	36	0	1
1129	69	16.7	81	0	0

	age	bmi	HbA1c_mmol_mol	has_asthma	has_high_blood_pressure	prediction
0	30	32.6	51	1	0	mild illness
1	40	20.5	89	0	0	mild illness
2	94	21.0	34	1	0	died
3	79	21.7	36	0	1	hospitalised
4	33	32.5	40	0	1	mild illness